{"decoding_method": "beam", "trf_num_heads": 8, "cpp_pretrain_scheme": "", "rnn_pooling": "last", "avg_first": false, "lambda_nsp_pretrain": 1.0, "decoding_alpha": 1.0, "rnn_bidirect_encode": true, "lambda_lm_pretrain_p": 2.0, "mask_prob_summary": 0.0, "lambda_cpp_pretrain": 1.0, "lifted_struct_margin": 0.3, "noisy_paragraph_scheme": "shuffle_sentences", "bert_pretrain_mask_rate_input": 0.12, "cluster_margin_multiplier": 0.3, "trf_postprocess_dropout": 0.1, "noisy_paragraph_prob": 0.5, "first_pretrain_steps": 0, "d_hidden_eq_hidden": false, "length_embed": false, "latent_size": 256, "lambda_lm_pretrain_s": 4.0, "clip_norm": 1.0, "add_critic": false, "gd_step_ratio": 2, "bert_pretrain_mask_type": "both", "bert_pretrain_enc": false, "trf_relu_dropout": 0.1, "lm_pretrain_dec": true, "max_sent_length": 60, "nsp_pretrain_not_next_diff_paragraph_prob": 0.0, "sbp_pretrain": false, "lambda_sbp_pretrain": 1.0, "lambda_psp": 0.0, "trf_pooling": "first", "dml_weight": 0.5, "lambda_bert_pretrain_p": 1.0, "mask_type": "both", "lambda_bert_pretrain_s": 1.0, "trf_hidden_size": 128, "rnn_num_layers": 1, "rnn_hidden_size": 128, "d_prior_hidden": 128, "tie_sent_para_dec": true, "cluster_margin_type": "nmi", "lambda_p": 2.0, "lambda_s": 4.0, "trf_num_layers": 2, "gs_temp": 2.0, "adv_prior_weight": 0.5, "trf_attention_dropout": 0.1, "triplet_margin": 0.3, "lambda_c": 0.0, "lambda_n": 0.0, "cluster_enable_pam_finetuning": false, "mask_rate_summary": 0.0, "lambda_c_avg": 0.0, "decoding_beam_size": 1, "mask_prob_input": 0.15, "nsp_pretrain_rely_on_bert": false, "tie_sent_para_enc": true, "pretrain_as_autoencoder": false, "out_domain_pretrain_steps": 0, "train_phase_subset": "all", "lambda_c_avg2": 0.0, "nsp_pretrain": true, "sop_num_sents_per_paragraph": 5, "sop_pretrain": false, "step_psp_on": 5000, "dml": "", "max_paragraph_length": 300, "vocab_size": 32586, "embedding_size": 128, "d_hidden": 128, "max_sent_per_paragraph": 5, "encoder_type": "transformer", "adv_weight": 0.5, "add_prior_critic": false, "decoder_type": "rnn", "learning_rate": 0.001, "npairs_reg_lambda": 0.002, "decode_length": 13, "in_domain_pretrain_steps": 100000, "mask_rate_input": 0.8, "lambda_p2": 0.0, "tie_embeddings": true, "pretrain_order": "simultaneous", "trf_filter_size": 512, "lambda_sop_pretrain": 1.0}